helper function to process time series

And some package and labeling info

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(kohonen)
library(vegan)  # for vegdist function which gives a dissimilarity index
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
timeseries_dir <- 'extracted_timeseries/'

# get ecs ordering/labels
esm_labels <- read.csv(paste0(timeseries_dir,'global_tas_allesms.csv'), stringsAsFactors = FALSE) %>%
  select(esm) %>% distinct %>% 
  mutate(plotesm = paste0(letters[as.integer(row.names(.))], '.', esm),
         ECS_order = as.integer(row.names(.)))
print(esm_labels)
##              esm         plotesm ECS_order
## 1    CAMS-CSM1-0   a.CAMS-CSM1-0         1
## 2         MIROC6        b.MIROC6         2
## 3      GFDL-ESM4     c.GFDL-ESM4         3
## 4      FGOALS-g3     d.FGOALS-g3         4
## 5  MPI-ESM1-2-HR e.MPI-ESM1-2-HR         5
## 6  MPI-ESM1-2-LR f.MPI-ESM1-2-LR         6
## 7     MRI-ESM2-0    g.MRI-ESM2-0         7
## 8  ACCESS-ESM1-5 h.ACCESS-ESM1-5         8
## 9   IPSL-CM6A-LR  i.IPSL-CM6A-LR         9
## 10   CESM2-WACCM   j.CESM2-WACCM        10
## 11   UKESM1-0-LL   k.UKESM1-0-LL        11
## 12       CanESM5       l.CanESM5        12
process_time_series <- function(time_series_df, esm_label_info,
                                hist_start = 1995, hist_end = 2014){
  # Get ensemble member values for projection runs:
  # 1. 2080-2099 average
  # 2. loess detrend each ensemble member to get IAV
  #
  # split by run
  non_hist <- time_series_df %>% 
    filter(experiment != 'historical') %>% 
    select(year, ann_agg, esm, experiment, ensemble, variable, region)
  
  grouped <- split(non_hist, f = list(non_hist$esm, 
                                      non_hist$experiment,
                                      non_hist$ensemble, 
                                      non_hist$variable ,
                                      non_hist$region) )
  # split creates group of every possible combo of the variables and fills in
  # empty dataframes for the ones that don't exist in data. Drop those
  grouped <- grouped[lapply(grouped, nrow)>0]
  
  processed_groups <- lapply(grouped, FUN = function(run_df){
    loess_resids <- loess(run_df$ann_agg ~ run_df$year)$residuals
    
    run_df %>%
      filter(year >= 2080, year <= 2099) %>%
      group_by(esm, experiment, ensemble, variable, region) %>%
      summarise(average_2080_2099 = mean(ann_agg)) %>% 
      ungroup  %>%
      mutate(iasd = sd((loess_resids))) ->
      output_df
    
    return(output_df)
    
  })
  
  individual_stats <- do.call(bind_rows, processed_groups)
  rm(non_hist)
  rm(grouped)
  rm(processed_groups)
  
  # calculate ensemble averages
  individual_stats %>%
    group_by(esm, experiment, variable,region) %>%
    summarise(average_2080_2099 = mean(average_2080_2099),
              iasd = mean(iasd)) %>%
    ungroup ->
    ensemble_stats
  
  
  # get ensemble average historical average value:
  time_series_df %>%
    filter(experiment == 'historical',
           year >= hist_start,
           year <= hist_end) %>%
    group_by(esm, experiment, ensemble, variable, region) %>%
    summarise(historical_average = mean(ann_agg)) %>%
    ungroup %>%
    group_by(esm, experiment, variable, region) %>%
    summarise(historical_average = mean(historical_average)) %>%
    ungroup %>%
    select(-experiment) ->
    historical_ens
  
  
  # shape and calculate changes for plotting:
  ensemble_stats %>%
    left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
    left_join(esm_label_info, by = 'esm') %>%
    mutate(change = average_2080_2099 - historical_average,
           pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
    plot_tbl
  
return(plot_tbl)
}


process_ens_time_series <- function(time_series_df, esm_label_info,
                                hist_start = 1995, hist_end = 2014){
  # Get ensemble member values for projection runs:
  # 1. 2080-2099 average
  # 2. loess detrend each ensemble member to get IAV
  #
  # split by run
  non_hist <- time_series_df %>% 
    filter(experiment != 'historical') %>% 
    select(year, ann_agg, esm, experiment, ensemble, variable, region)
  
  grouped <- split(non_hist, f = list(non_hist$esm, 
                                      non_hist$experiment,
                                      non_hist$ensemble, 
                                      non_hist$variable ,
                                      non_hist$region) )
  # split creates group of every possible combo of the variables and fills in
  # empty dataframes for the ones that don't exist in data. Drop those
  grouped <- grouped[lapply(grouped, nrow)>0]
  
  processed_groups <- lapply(grouped, FUN = function(run_df){
    loess_resids <- loess(run_df$ann_agg ~ run_df$year)$residuals
    
    run_df %>%
      filter(year >= 2080, year <= 2099) %>%
      group_by(esm, experiment, ensemble, variable, region) %>%
      summarise(average_2080_2099 = mean(ann_agg)) %>% 
      ungroup  %>%
      mutate(iasd = sd((loess_resids))) ->
      output_df
    
    return(output_df)
    
  })
  
  individual_stats <- do.call(bind_rows, processed_groups)
  rm(non_hist)
  rm(grouped)
  rm(processed_groups)
  
  # calculate ensemble averages
  individual_stats ->
    ensemble_stats
  
  
  # get ensemble average historical average value:
  time_series_df %>%
    filter(experiment == 'historical',
           year >= hist_start,
           year <= hist_end) %>%
    group_by(esm, experiment, ensemble, variable, region) %>%
    summarise(historical_average = mean(ann_agg)) %>%
    ungroup %>%
    group_by(esm, experiment, variable, region) %>%
    summarise(historical_average = mean(historical_average)) %>%
    ungroup %>%
    select(-experiment) ->
    historical_ens
  
  
  # shape and calculate changes for plotting:
  ensemble_stats %>%
    left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
    left_join(esm_label_info, by = 'esm') %>%
    mutate(change = average_2080_2099 - historical_average,
           pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
    plot_tbl
  
return(plot_tbl)
}

prep_esm_TP_data <- function(esmname){
  
  region_timeseries <- read.csv(paste0(timeseries_dir, 'IPCC_land_regions_tas_', esmname, '_timeseries_1980_2099.csv'),
                              stringsAsFactors = FALSE) %>% mutate(region = acronym)
  
  region_tas_summary <- suppressMessages(process_time_series(time_series_df = region_timeseries, esm_label_info = esm_labels))
  
  region_pr_summary <- suppressMessages(process_time_series(time_series_df = 
                                                            read.csv(paste0(timeseries_dir, 'IPCC_land_regions_pr_', esmname,'_timeseries_1980_2099.csv'),
                                                                     stringsAsFactors = FALSE) %>%
                                                            rename(ann_agg=pr) %>% 
                                                            mutate(region = acronym) ,
                                                          esm_label_info = esm_labels))
  
  # reshape so each row is an observation
  # observation = esm - experiment - region - tas change-tas iasd - pr pct change
  region_tas_summary %>% 
    select(esm, experiment, region, iasd, change) %>%
    rename(tas_change = change) %>%
    left_join(region_pr_summary %>%
                select(esm, experiment, region, pct_change) %>% 
                rename(pr_pct = pct_change),
              by = c('esm', 'experiment', 'region')) ->
    region_summary

return(region_summary)
  
}

load for an ESM

Consider an observation = esm - experiment - region : tas change-tas iasd - pr pct change

# region_summary <- prep_esm_TP_data(esmname =  'GFDL-ESM4')
# region_summary %>%
#   bind_rows(prep_esm_TP_data(esmname =  'CESM2-WACCM')) %>%
#    bind_rows(prep_esm_TP_data(esmname =  'CAMS-CSM1-0'))->
#   region_summary_main
# write.csv(region_summary_main, 'CAMS_GFDL_CESM2-WACCM_summaries.csv', row.names = F)
region_summary_main <- read.csv('CAMS_GFDL_CESM2-WACCM_summaries.csv', stringsAsFactors = FALSE)
# make a copy to operate on
region_summary <- region_summary_main
print(head(region_summary))
##         esm experiment region      iasd tas_change     pr_pct
## 1 GFDL-ESM4     ssp119    ARP 0.2652738  0.2610944  2.9927902
## 2 GFDL-ESM4     ssp119    CAF 0.2703431  0.5037959 -1.9838156
## 3 GFDL-ESM4     ssp119    CAR 0.1574689  0.2216722 -0.3187443
## 4 GFDL-ESM4     ssp119    CAU 0.5345113  0.4218907 -3.8398722
## 5 GFDL-ESM4     ssp119    CNA 0.4910504  0.9446315  6.3459839
## 6 GFDL-ESM4     ssp119    EAS 0.2584605  0.7250376 10.5204045

Spatial info

default SOM packages can only operate on numerical data, not categorical. So we have to assign some amount of spatial location numerical info to each acronym. Ideally mean lat-lon in the shape?

Also we want the shapes for plotting anyway, so prep them

library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
shp <- st_read(dsn = 'IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp', stringsAsFactors = F)
## Reading layer `IPCC-WGI-reference-regions-v4' from data source 
##   `/Users/snyd535/Documents/GitHub/stitches_in_r/R/inst/shinyApp/python_curation/IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 58 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 90
## Geodetic CRS:  WGS 84
# add a numerical region id
shp %>% 
  mutate(region_id = as.integer(row.names(.))) ->
  shp

# add coordinate info probably
shp1 <-  st_transform(shp, "+proj=longlat +ellps=WGS84 +datum=WGS84")

# extract
coords <- as.data.frame(st_coordinates(shp1))


# get a mean lon and lat value in each shape
coords %>%
  rename(lon = X, lat = Y, region_id = L3) %>%
  left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
  filter(grepl('PO', Acronym)) %>% 
  # have to have lon on 0:360 so th pacific ocean behaves even though not
  # looking at that here
  mutate(lon_360 = if_else(lon >=0, lon, lon+360))%>%
  group_by(region_id) %>%
  summarise(mean_lon = mean(lon_360),
            mean_lat = mean(lat)) %>%
  ungroup  %>%
  mutate(mean_lon = if_else(mean_lon >= 0 & mean_lon <= 180, 
                            mean_lon, mean_lon - 360) ) ->
  mean_pts_PO

coords %>%
  rename(lon = X, lat = Y, region_id = L3) %>%
  left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
  filter(!grepl('PO', Acronym)) %>% 
  # have to have lon on 0:360 so th pacific ocean behaves even though not
  # looking at that here
  group_by(region_id) %>%
  summarise(mean_lon = mean(lon),
            mean_lat = mean(lat)) %>%
  ungroup  %>% 
  bind_rows(mean_pts_PO)->
  mean_pts 
# Join to the shape file and make sure this very simple way of
# doing things ends up with a lon lat that is actually in each region
shp %>%
  left_join(mean_pts, by = 'region_id') ->
  shp

ggplot() +
  geom_sf(data = shp  ) +
  geom_point(data = shp, mapping = aes(x = mean_lon, y = mean_lat), color = 'red') +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =5)

test - convert to rgb first

we have 3 variables per observarion = experimentXregion -> convert to rgb values

  • looking across ESMs, we’ll want to make sure we have them all on consistent range before converting to (0,255)

  • each color/family of colors does have a physical interpretation in terms of iasd, t, p

  • red = temperature

  • blue = precip

  • iasd = green

# convert to RGB
region_summary %>%
  filter(experiment != 'ssp119') ->
  region_summary 
region_summary$r <- scales::rescale(region_summary$tas_change, to =c(0,255))
region_summary$g <- scales::rescale(region_summary$iasd, to =c(0,255))
region_summary$b <- scales::rescale(region_summary$pr_pct, to =c(0,255))


# add spatial numerical info
region_summary %>%
  left_join(as.data.frame(shp) %>% select(Acronym, mean_lon, mean_lat), by = c('region' = 'Acronym')) %>%
  rename(lon  = mean_lon, lat = mean_lat) %>%
  # add the original colors 
  mutate(orig_color = rgb(.$r, .$g, .$b, maxColorValue  = 255)) ->
  region_summary

region_numerical <- as.data.frame(region_summary[c('lon', 'lat', 'r', "g", "b")])

3d plot to get a sense of the legend

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fig <- plot_ly(region_summary, x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = region_summary$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp126'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp126'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp245'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp245'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp370'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp370'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp585'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp585'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
  • blues: relatively small temperature changes; more green = more variability; hard to gauge the blue depth corresponding to increasing or decreasing pr with how setup - reds: much larger temperature changes

SOM

set.seed(11)
#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 516"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events <- som.model$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors <- rgb( som.events[,3],                 
                          som.events[,4],               
                          som.events[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist <- as.matrix(dist(som.events))
print(head(som.events))
##         lon        lat         r        g        b
## V1  99.5254  59.167376 163.12832 141.3639 196.3608
## V2 133.8064  55.852860 110.43912 125.9292 168.7815
## V3 150.7777  54.981011  61.90710 112.5471 144.7106
## V4 143.0164  -7.773551  53.72205 122.8078 136.4233
## V5 138.3231 -26.048733  55.77659 137.2779 135.6411
## V6 123.2724  -1.249687  80.17123 148.4979 153.1180
p1<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors) +
  theme(legend.position = 'none') + 
  ggtitle('CAMS, GFDL and CESM2-WACCM')
p1

The default plotting starts bottom left and populates the SOMs in the order they came out/are in in events

plot(som.model,
     type = 'mapping',
     bg = som.events.colors,
     keepMargins = F,
     col = NA,
     main = '')

test the same thing again for robustness

#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 516"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model1 <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events1 <- som.model1$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors1 <- rgb( som.events1[,3],                 
                          som.events1[,4],               
                          som.events1[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist1 <- as.matrix(dist(som.events1))
p2<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events1), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors1) +
  theme(legend.position = 'none')+ 
  ggtitle('CAMS, GFDL and CESM2-WACCM')
p1

p2

ggsave(paste0('python_curation_figs/CAMS_GFDL_CESM2-WACCM_som1_grid', grid.size, '.png'), p1, width = 8, height = 6, units = 'in')
ggsave(paste0('python_curation_figs/CAMS_GFDL_CESM2-WACCM_som2_grid', grid.size, '.png'), p2, width = 8, height = 6, units = 'in')
plot(som.model,
     type = 'mapping',
     bg = som.events.colors,
     keepMargins = F,
     col = NA,
     main = '')

plot(som.model1,
     type = 'mapping',
     bg = som.events.colors1,
     keepMargins = F,
     col = NA,
     main = '')

Label observations

Let’s label each observation with its SOM classification

region_summary$model1_unitclass <- som.model$unit.classif
region_summary$model2_unitclass <- som.model1$unit.classif

region_summary %>%
  left_join(data.frame(model1_unitclass = 1:169) %>%
              mutate(color1 = som.events.colors),
            by='model1_unitclass') %>%
  left_join(data.frame(model2_unitclass = 1:169) %>%
              mutate(color2 = som.events.colors1),
            by='model2_unitclass')->
  region_data


print(region_data %>% filter(model1_unitclass == 17))
##  [1] esm              experiment       region           iasd            
##  [5] tas_change       pr_pct           r                g               
##  [9] b                lon              lat              orig_color      
## [13] model1_unitclass model2_unitclass color1           color2          
## <0 rows> (or 0-length row.names)

and the original color:

region_data %>%
  mutate(orig_color = rgb(.$r, .$g, .$b, maxColorValue  = 255)) ->
  tmp

ggplot(tmp) + geom_tile(mapping = aes( x = interaction(experiment, esm), y = region, fill = orig_color) ) +
  scale_fill_manual(values = tmp$orig_color) +
  theme(legend.position='none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ggtitle('CAMS, GFDL, CESM2-WACCM original colors')

ggplot(tmp) + geom_tile(mapping = aes( x = interaction(experiment, esm), y = region, fill = color1) ) +
  scale_fill_manual(values = tmp$color1) +
  theme(legend.position='none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  ggtitle('CAMS, GFDL, CESM2-WACCM SOM1 colors')

ggplot(tmp) + geom_tile(mapping = aes( x = interaction(experiment, esm), y = region, fill = color2) ) +
  scale_fill_manual(values = tmp$color2) +
  theme(legend.position='none',
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  ggtitle('CAMS, GFDL, CESM2-WACCM SOM2 colors')

Maybe do it separately by SSP

Start with SSP126 low temperature changes so should be mostly blues

region_summary %>%
  filter(experiment == 'ssp126') ->
  region_summary_126

region_numerical <- as.data.frame(region_summary_126[c('lon', 'lat', 'r', "g", "b")])

SOM SSP126

set.seed(11)
#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 129"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events <- som.model$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors <- rgb( som.events[,3],                 
                          som.events[,4],               
                          som.events[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist <- as.matrix(dist(som.events))
p1<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors) +
  theme(legend.position = 'none') + 
  ggtitle('CAMS, GFDL and CESM2-WACCM SSP126')
p1

plot(som.model,
     type = 'mapping',
     bg = som.events.colors,
     keepMargins = F,
     col = NA,
     main = '')

test the same thing again for robustness

#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 129"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model1 <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events1 <- som.model1$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors1 <- rgb( som.events1[,3],                 
                          som.events1[,4],               
                          som.events1[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist1 <- as.matrix(dist(som.events1))
p2<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events1), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors1) +
  theme(legend.position = 'none')+ 
  ggtitle('CAMS, GFDL and CESM2-WACCM SSP126')
p1

p2

ggsave(paste0('python_curation_figs/CAMS_GFDL_CESM2-WACCM_som1_grid', grid.size, '_ssp126.png'), p1, width = 8, height = 6, units = 'in')
ggsave(paste0('python_curation_figs/CAMS_GFDL_CESM2-WACCM_som2_grid', grid.size, '_ssp126.png'), p2, width = 8, height = 6, units = 'in')
plot(som.model,
     type = 'mapping',
     bg = som.events.colors,
     keepMargins = F,
     col = NA,
     main = '')

plot(som.model1,
     type = 'mapping',
     bg = som.events.colors1,
     keepMargins = F,
     col = NA,
     main = '')

Same thing but with lat-lon labels

plot(som.model,
     type = 'mapping',
     bg = som.events.colors,
     keepMargins = F,
     col = NA,
     main = '')
dimnames(som.model$grid$pts) = list(paste0(as.character(round(as.data.frame(som.events)$lon,0)),
                                                '~',
                                                as.character(round(as.data.frame(som.events)$lat,0)))
                                                , c("lon","lat")) 
text(som.model$grid$pts, dimnames(som.model$grid$pts)[[1]])

plot(som.model1,
     type = 'mapping',
     bg = som.events.colors1,
     keepMargins = F,
     col = NA,
     main = '')
dimnames(som.model1$grid$pts) = list(paste0(as.character(round(as.data.frame(som.events1)$lon,0)),
                                                '~',
                                                as.character(round(as.data.frame(som.events1)$lat,0)))
                                                , c("lon","lat")) 
text(som.model1$grid$pts, dimnames(som.model1$grid$pts)[[1]])

Just CAMS

Start with CAMS low temperature changes so should be mostly blues

region_summary %>%
  filter(esm == 'CAMS-CSM1-0') ->
  region_summary_cams

region_numerical <- as.data.frame(region_summary_cams[c('lon', 'lat', 'r', "g", "b")])

SOM CAMS

set.seed(11)
#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 172"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model.cams <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events.cams <- som.model.cams$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors.cams <- rgb( som.events.cams[,3],                 
                          som.events.cams[,4],               
                          som.events.cams[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist.cams <- as.matrix(dist(som.events.cams))
p1<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events.cams), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors.cams) +
  theme(legend.position = 'none') + 
  ggtitle('CAMS')
p1

plot(som.model.cams,
     type = 'mapping',
     bg = som.events.colors.cams,
     keepMargins = F,
     col = NA,
     main = '')

test the same thing again for robustness

#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 172"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model1.cams <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events1.cams <- som.model1.cams$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors1.cams <- rgb( som.events1.cams[,3],                 
                          som.events1.cams[,4],               
                          som.events1.cams[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist1.cams <- as.matrix(dist(som.events1.cams))
p2<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events1.cams), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors1.cams) +
  theme(legend.position = 'none')+ 
  ggtitle('CAMS')
p1

p2

ggsave(paste0('python_curation_figs/CAMS_som1_grid', grid.size, '.png'), p1, width = 8, height = 6, units = 'in')
ggsave(paste0('python_curation_figs/CAMS_som2_grid', grid.size, '.png'), p2, width = 8, height = 6, units = 'in')
plot(som.model.cams,
     type = 'mapping',
     bg = som.events.colors.cams,
     keepMargins = F,
     col = NA,
     main = '')

plot(som.model1.cams,
     type = 'mapping',
     bg = som.events.colors1.cams,
     keepMargins = F,
     col = NA,
     main = '')

Just CESM2

region_summary %>%
  filter(esm == 'CESM2-WACCM') ->
  region_summary_cesm

region_numerical <- as.data.frame(region_summary_cesm[c('lon', 'lat', 'r', "g", "b")])

SOM cesm

set.seed(11)
#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 172"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model.cesm <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events.cesm <- som.model.cesm$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors.cesm <- rgb( som.events.cesm[,3],                 
                          som.events.cesm[,4],               
                          som.events.cesm[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist.cesm <- as.matrix(dist(som.events.cesm))
p1<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events.cesm), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors.cesm) +
  theme(legend.position = 'none') + 
  ggtitle('cesm')
p1

plot(som.model.cesm,
     type = 'mapping',
     bg = som.events.colors.cesm,
     keepMargins = F,
     col = NA,
     main = '')

test the same thing again for robustness

#### train the SOM ####
sample.size <- nrow(region_numerical)
print(paste('Num Observations is', sample.size ))
## [1] "Num Observations is 172"
## define a grid for the SOM and train
grid.size <-  ceiling(sample.size ^ (1/2.5))
som.grid <- somgrid(xdim = grid.size, ydim = grid.size, topo = 'hexagonal', toroidal = F)
som.model1.cesm <- som(data.matrix(region_numerical), grid = som.grid)
# extract some values to be useful
som.events1.cesm <- som.model1.cesm$codes[[1]]

# assign a color to each event because we have 3 metrics
# red = iasd
# green = temperature change
# blue = precip change
som.events.colors1.cesm <- rgb( som.events1.cesm[,3],                 
                          som.events1.cesm[,4],               
                          som.events1.cesm[,5],                 
                          maxColorValue = 255)

# calculate a distance matrix
som.dist1.cesm <- as.matrix(dist(som.events1.cesm))
p2<- ggplot() +
  geom_sf(data = shp  ) +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =3) +
  geom_point(data = as.data.frame(som.events1.cesm), mapping = aes(x = lon, y = lat, color = interaction(r,g,b)), alpha = 0.75, size = 3) +
  scale_color_manual(values = som.events.colors1.cesm) +
  theme(legend.position = 'none')+ 
  ggtitle('cesm')
p1

p2

ggsave(paste0('python_curation_figs/cesm_som1_grid', grid.size, '.png'), p1, width = 8, height = 6, units = 'in')
ggsave(paste0('python_curation_figs/cesm_som2_grid', grid.size, '.png'), p2, width = 8, height = 6, units = 'in')

# dig in comparison

  • Some points are very similar like (46, 14) and (46,13), they are just different SOMs ordered

  • some are really not (like the mustard points (35, -24) vs (-59,5))

  • distill into other clusters and see how regions get assigned then plot map by ESM and SSP of those new clusters to see if consistent?

  • bright green/teal at (65, 59) actually kind of consistent between the two models

plot(som.model.cesm,
     type = 'mapping',
     bg = som.events.colors.cesm,
     keepMargins = F,
     col = NA,
     main = '')
dimnames(som.model.cesm$grid$pts) = list(paste0(as.character(round(as.data.frame(som.events.cesm)$lon,0)),
                                                '~',
                                                as.character(round(as.data.frame(som.events.cesm)$lat,0)))
                                                , c("lon","lat")) 
text(som.model.cesm$grid$pts, dimnames(som.model.cesm$grid$pts)[[1]])

CAMS for comparison

clustering

So for each ESM, some of the events are consistent between estimated models and some aren’t

Maybe if we cluster the outputs, it’ll be distilled to the robust part. Then for each ESM-SSP, we can make a map with each region filled in with the coarse cluster value

how many clusters CESM

#### look for a reasonable number of clusters ####

## Evaluate within cluster distances for different values of k.  This is
## more dependent on the number of map units in the SOM than the structure
## of the underlying data, but until we have a better way...

## Define a function to calculate mean distance within each cluster.  This
## is roughly analogous to the within clusters ss approach

try.k <- 2:60

clusterMeanDist <- function(clusters, som.dist){
  cluster.means = c()
  
  for(c in unique(clusters)){
    temp.members <- which(clusters == c)
    
    if(length(temp.members) > 1){
      temp.dist <- som.dist[temp.members,]
      temp.dist <- temp.dist[,temp.members]
      cluster.means <- append(cluster.means, mean(temp.dist))
    }else(cluster.means <- 0)
  }
  
  return(mean(cluster.means))
  
}

num_clusters <- function(events){
  try.k <- 2:60
  cluster.dist.eval <- as.data.frame(matrix(ncol = 3, nrow = (length(try.k))))
  colnames(cluster.dist.eval) <- c('k', 'kmeans', 'hclust')
  
  som.dist <- as.matrix(dist(events))
  
  for(i in 1:length(try.k)) {
    cluster.dist.eval[i, 'k'] <- try.k[i]
    cluster.dist.eval[i, 'kmeans'] <- clusterMeanDist(kmeans(events, centers = try.k[i], iter.max = 20)$cluster,
                                                      som.dist=som.dist)
    cluster.dist.eval[i, 'hclust'] <- clusterMeanDist(cutree(hclust(vegdist(events)), k = try.k[i]),
                                                      som.dist=som.dist)
  }
  
  return(cluster.dist.eval)
}


cesm1_cluster <- suppressWarnings(num_clusters(events=som.events.cesm))

plot(cesm1_cluster[, 'kmeans'] ~ try.k,
     type = 'l')

lines(cesm1_cluster[, 'hclust'] ~ try.k,
      col = 'red')
       
legend('topright',
       legend = c('k-means', 'hierarchical'),
       col = c('black', 'red'),
       lty = c(1, 1))

cesm2_cluster <- suppressWarnings(num_clusters(events=som.events1.cesm))

plot(cesm2_cluster[, 'kmeans'] ~ try.k,
     type = 'l')

lines(cesm2_cluster[, 'hclust'] ~ try.k,
      col = 'red')
       
legend('topright',
       legend = c('k-means', 'hierarchical'),
       col = c('black', 'red'),
       lty = c(1, 1))

print(cesm1_cluster[1:20,])
##     k    kmeans    hclust
## 1   2 115.09368 116.88609
## 2   3 101.04137  87.69923
## 3   4  89.26251  70.29772
## 4   5  79.09651  66.54426
## 5   6  71.74314  66.85624
## 6   7  68.25380  36.65696
## 7   8  62.54271  41.18593
## 8   9  58.10822  54.27531
## 9  10  56.97680  54.27531
## 10 11  53.30057  50.90877
## 11 12  47.20858  46.31386
## 12 13  48.06856  50.17945
## 13 14  46.38431  50.17945
## 14 15   0.00000  45.87915
## 15 16  39.80492  45.87915
## 16 17  38.80640  45.87915
## 17 18  38.11367  45.04198
## 18 19   0.00000  45.04198
## 19 20   0.00000  41.68734
## 20 21  30.61355  38.38915
print(cesm2_cluster[1:20,])
##     k    kmeans   hclust
## 1   2 114.78908 84.38540
## 2   3  99.46178 87.38900
## 3   4  84.44001 75.01906
## 4   5  79.21061 71.42349
## 5   6  72.70291 64.65223
## 6   7  66.30695  0.00000
## 7   8  59.32091  0.00000
## 8   9  54.84698  0.00000
## 9  10  55.00760  0.00000
## 10 11  52.19175  0.00000
## 11 12  49.97845  0.00000
## 12 13  47.00709  0.00000
## 13 14  45.16438  0.00000
## 14 15  43.52692  0.00000
## 15 16  40.21425  0.00000
## 16 17  39.61848  0.00000
## 17 18  37.02584  0.00000
## 18 19  35.83780  0.00000
## 19 20   0.00000  0.00000
## 20 21  18.33370  0.00000

6 clusters seems good for CESM

num clusters CAMS

cams1_cluster <- suppressWarnings(num_clusters(events=som.events.cams))

plot(cams1_cluster[, 'kmeans'] ~ try.k,
     type = 'l')

lines(cams1_cluster[, 'hclust'] ~ try.k,
      col = 'red')
       
legend('topright',
       legend = c('k-means', 'hierarchical'),
       col = c('black', 'red'),
       lty = c(1, 1))

cams2_cluster <- suppressWarnings(num_clusters(events=som.events1.cams))

plot(cams2_cluster[, 'kmeans'] ~ try.k,
     type = 'l')

lines(cams2_cluster[, 'hclust'] ~ try.k,
      col = 'red')
       
legend('topright',
       legend = c('k-means', 'hierarchical'),
       col = c('black', 'red'),
       lty = c(1, 1))

print(cams1_cluster[1:20,])
##     k   kmeans    hclust
## 1   2 96.63535 115.79397
## 2   3 85.10094  85.77567
## 3   4 65.03710  67.26459
## 4   5 57.65432  62.58544
## 5   6 52.54121  54.85245
## 6   7 48.61955  43.31569
## 7   8 46.93191  35.35015
## 8   9 43.23723  35.35015
## 9  10 41.50743  34.90566
## 10 11 40.21328  34.90566
## 11 12 35.36624  29.52258
## 12 13 35.32931  30.07854
## 13 14 33.88767  30.39190
## 14 15 31.68207  29.88608
## 15 16 31.38483  27.34743
## 16 17 29.87922  27.45862
## 17 18 29.09894  26.22720
## 18 19  0.00000  27.35123
## 19 20 21.80350  27.35123
## 20 21 24.94992  26.52608
print(cams2_cluster[1:20,])
##     k   kmeans    hclust
## 1   2 97.88362 103.47237
## 2   3 84.62660  67.66945
## 3   4 65.41690  57.17970
## 4   5 61.09000  54.13661
## 5   6 53.88740  53.04385
## 6   7 50.31881  49.64488
## 7   8 47.12152  43.19777
## 8   9 44.88431  39.30754
## 9  10 42.72430  39.30754
## 10 11 39.28659  33.77423
## 11 12 37.44854  33.77423
## 12 13 35.88324  39.78227
## 13 14 34.36304  39.78227
## 14 15 32.11478  39.78227
## 15 16 30.58219  39.78227
## 16 17  0.00000  38.61507
## 17 18 27.91163  35.32840
## 18 19 27.26736  28.61386
## 19 20 26.24581  28.98457
## 20 21  0.00000  28.98457

7 clusters seems ok for cams - maybe 6 would be as well

CESM clusters

#### evaluate clustering algorithms ####

## Having selected a reasonable value for k, evaluate different clustering algorithms.

## Define a function for make a simple plot of clustering output.
## This is the same as previousl plotting, but we define the function
## here as we wanted to play with the color earlier.

plotSOM <- function(som_model, som_colors, clusters){
  plot(som_model,
       type = 'mapping',
       bg = som_colors,
       keepMargins = F,
       col = NA)
  
  add.cluster.boundaries(som_model, clusters)
}

## Try several different clustering algorithms, and, if desired, different values for k

cluster.tries <- list()

for(k in c(6)){
 
  ## k-means clustering
  
  som.cluster.k <- kmeans(som.events.cesm, centers = k, iter.max = 100, nstart = 10)$cluster # k-means
  
  ## hierarchical clustering
  
  som.dist <- dist(som.events.cesm) # hierarchical, step 1
  som.cluster.h <- cutree(hclust(som.dist), k = k) # hierarchical, step 2
  
  ## capture outputs
  cluster.tries[[paste0('som.cluster.k.', k)]] <- som.cluster.k
  cluster.tries[[paste0('som.cluster.h.', k)]] <- som.cluster.h
}

plotSOM(som_model = som.model.cesm, som_colors = som.events.colors.cesm, clusters = cluster.tries$som.cluster.k.6)

plotSOM(som_model = som.model.cesm, som_colors = som.events.colors.cesm, clusters = cluster.tries$som.cluster.h.6)

region_summary_cesm$model1_unitclass <- som.model.cesm$unit.classif

region_summary_cesm %>%
    left_join(data.frame(model1_unitclass = 1:64, k_cluster1 = cluster.tries$som.cluster.k.6), by = 'model1_unitclass') %>%
    left_join(data.frame(model1_unitclass = 1:64, h_cluster1 = cluster.tries$som.cluster.h.6), by = 'model1_unitclass')->
    clustered
## Try several different clustering algorithms, and, if desired, different values for k

cluster.tries <- list()

for(k in c(6)){
 
  ## k-means clustering
  
  som.cluster.k <- kmeans(som.events1.cesm, centers = k, iter.max = 100, nstart = 10)$cluster # k-means
  
  ## hierarchical clustering
  
  som.dist <- dist(som.events1.cesm) # hierarchical, step 1
  som.cluster.h <- cutree(hclust(som.dist), k = k) # hierarchical, step 2
  
  ## capture outputs
  cluster.tries[[paste0('som.cluster.k.', k)]] <- som.cluster.k
  cluster.tries[[paste0('som.cluster.h.', k)]] <- som.cluster.h
}

plotSOM(som_model = som.model1.cesm, som_colors = som.events.colors1.cesm, clusters = cluster.tries$som.cluster.k.6)

plotSOM(som_model = som.model1.cesm, som_colors = som.events.colors1.cesm, clusters = cluster.tries$som.cluster.h.6)

clustered$model2_unitclass <- som.model1.cesm$unit.classif

clustered %>%
    left_join(data.frame(model2_unitclass = 1:64, k_cluster2 = cluster.tries$som.cluster.k.6), by = 'model2_unitclass') %>%
  left_join(data.frame(model2_unitclass = 1:64, h_cluster2 = cluster.tries$som.cluster.h.6), by = 'model2_unitclass')->
    clustered

shp %>% 
    left_join(clustered, by = c('Acronym' = 'region')) ->
    shp1

CESM 126

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp126'), aes(fill = as.factor(k_cluster1)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp126'), aes(fill = as.factor(k_cluster2)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp126'), aes(fill = as.factor(h_cluster1)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp126'), aes(fill = as.factor(h_cluster2)) ) 

## CESM 245

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp245'), aes(fill = as.factor(k_cluster1)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp245'), aes(fill = as.factor(k_cluster2)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp245'), aes(fill = as.factor(h_cluster1)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp245'), aes(fill = as.factor(h_cluster2)) ) 

## CESM 370

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp370'), aes(fill = as.factor(k_cluster1)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp370'), aes(fill = as.factor(k_cluster2)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp370'), aes(fill = as.factor(h_cluster1)) ) 

ggplot() +
    geom_sf(data = shp1 %>% filter( experiment == 'ssp370'), aes(fill = as.factor(h_cluster2)) ) 

One clustering, all ssps

ggplot() +
    geom_sf(data = shp1 %>% na.omit, aes(fill = as.factor(k_cluster1)) ) +
  facet_wrap(~experiment, nrow =2 )

ggplot() +
    geom_sf(data = shp1 %>% na.omit, aes(fill = as.factor(k_cluster2)) ) +
  facet_wrap(~experiment, nrow =2 )

## Clustering conclusions

Abstracts TOO much and is meaningless: all 6 clusters show up for each SSP, which means we aren’t extracting significant insight.

Plot maps with SOMs

By ESM and SSP

Because we put the data all on the same scale initially, a blue in CAMs SOM is the same as the same blue in the CESM som.

The HARD part is getting the SOM-specific colors on the maps

region_summary_cesm$model1_unitclass <- som.model.cesm$unit.classif
region_summary_cesm$model2_unitclass <- som.model1.cesm$unit.classif

shp %>% 
    left_join(region_summary_cesm, by = c('Acronym' = 'region')) ->
    shp_cesm
ggplot() +
    geom_sf(data = shp_cesm %>% na.omit, aes(fill = as.factor(model1_unitclass)) ) +
  scale_fill_manual(values = som.events.colors.cesm)+
  facet_wrap(~experiment, nrow =2 ) + 
  ggtitle('CESM, model1')

ggplot() +
    geom_sf(data = shp_cesm %>% na.omit, aes(fill = as.factor(model2_unitclass)) ) +
  scale_fill_manual(values = som.events.colors1.cesm)+
  facet_wrap(~experiment, nrow =2 ) + 
  ggtitle('CESM, model2')

# not evaluated

arrange by lon-lat and plot

plotter <- function(events, colors){
  
  data <- as.data.frame(events)
  data$color <- colors
  
  lons <- data.frame('lon' = sort(unique(data$lon))) %>% mutate(lon_order = as.integer(row.names(.)))
  lats <- data.frame('lat' = sort(unique(data$lat))) %>% mutate(lat_order = as.integer(row.names(.)))
  
  data %>%
    left_join(lons, by = 'lon') %>%
    left_join(lats, by = 'lat') %>%
    arrange(lon, lat) %>%
    mutate(plot_order = as.integer(row.names(.))) ->
    data_plot
  
  p <- ggplot(cesm1_plot) + 
    geom_point(mapping = aes(x = lon_order, y = 1, color = color), size = 3) +
    facet_wrap(~plot_order, nrow = sqrt(nrow(data))) +
    scale_color_manual(values = data_plot$color) +
    theme_bw()+
    theme(legend.position = 'none',
          strip.background = element_blank(),
          strip.text.x = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()) 
  return(p)
  
}
cesm_1<- as.data.frame(som.events.cesm)
cesm_1$color <- som.events.colors.cesm



lons <- data.frame('lon' = sort(unique(cesm_1$lon))) %>% mutate(lon_order = as.integer(row.names(.)))
lats <- data.frame('lat' = sort(unique(cesm_1$lat))) %>% mutate(lat_order = as.integer(row.names(.)))

cesm_1 %>%
  left_join(lons, by = 'lon') %>%
  left_join(lats, by = 'lat') %>%
  arrange(lon, lat) %>%
  mutate(plot_order = as.integer(row.names(.))) ->
  cesm1_plot

p1<- ggplot(cesm1_plot) + 
  geom_point(mapping = aes(x = lon_order, y = lat_order, color = color), size = 3) +
  facet_wrap(~lat_order, nrow = 8) +
  #facet_grid(lon_order~lat_order) +
  scale_color_manual(values = cesm1_plot$color) +
  theme_bw()+
  theme(legend.position = 'none',
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) 

p <- plotter(events = som.events.cesm, colors = som.events.colors.cesm)
p
p1
cesm_2<- as.data.frame(som.events1.cesm)
cesm_2$color <- som.events.colors1.cesm


cams_1<- as.data.frame(som.events.cesm)
cams_1$color <- som.events.colors.cesm

cesm_2<- as.data.frame(som.events1.cesm)
cesm_2$color <- som.events.colors1.cesm

Grid and Challenge

Haven’t figured out if the same ‘seafoam’ event in the CESM grid is actually the same underlying lat lon and it’s just ignoring lat lon for plotting. The maps I have plotted make me think no

ALSO the kohonen package only comes with toroidal and non-toroidal grids. TO equate -180 and 180 lon, have to equate -90 and 90 lat which is for sure wrong

not averaging the ensemble